## Base RF model
set.seed(42)
library(ggplot2) # for autoplot() generic
library(dplyr)
library(ggpubr)
library(ggcorrplot)
library(sf)
library(tmap)
library(car)
dat_sf <- st_read("./data/glacier_clim.shp")
## Reading layer `glacier_clim' from data source `/Users/u0784726/Dropbox/Data/devtools/glacier_mb/data/glacier_clim.shp' using driver `ESRI Shapefile'
## Simple feature collection with 95086 features and 30 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 67.47845 ymin: 27.4918 xmax: 103.8915 ymax: 45.35089
## Geodetic CRS: GCS_unknown
my_vars <- c("mb_mwea", "area_km2", "z_med", "hi", "z_aspct", "z_slope", "tau",
"t2m_18", "t2m_d", "tp_18", "tp_d", "tpjja_18")
dat <- dat_sf %>%
st_drop_geometry() %>%
dplyr::select(any_of(my_vars))
dat_cor <- cor(dat)
ggcorrplot(dat_cor)
Variance inflation factors (\(>5\) is a conservative threshold for multicollinearity). The high value for the seasonality is probably from the correlation with general precipitation values.
fit <- lm(mb_mwea ~ ., dat)
vif(fit)
## area_km2 z_med hi z_aspct z_slope tau t2m_18 t2m_d
## 1.343290 1.741905 1.097497 1.008888 1.349042 1.924074 2.528093 1.144644
## tp_18 tp_d tpjja_18
## 4.384053 1.794277 6.292842
for(i in my_vars) {
p1 <- gghistogram(dat, i)
print(p1)
}
Possible variables for log-transform:
area_km2z_slopetautp_18tpjja_18dat$larea_km2 <- log(dat$area_km2)
p1 <- gghistogram(dat, "larea_km2")
print(p1)
dat$lz_slope <- log(dat$z_slope)
p1 <- gghistogram(dat, "lz_slope")
print(p1)
dat$ltau <- log(dat$tau)
p1 <- gghistogram(dat, "ltau")
print(p1)
dat$ltp_18 <- log(dat$tp_18)
p1 <- gghistogram(dat, "ltp_18")
print(p1)
dat$ltpjja_18 <- log(dat$tpjja_18)
p1 <- gghistogram(dat, "ltpjja_18")
print(p1)
for(i in my_vars) {
m1 <- tm_shape(dat_sf) +
tm_symbols(col = i,
size = 0.25,
alpha = 0.75,
style = "quantile")
print(m1)
}